Latent Variables active in Random Forests

First we get the metaviper predictions, LV scores, and Random Forest weights from Synapse. We filter for LVs that are selected by the random forest.

#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',mp_scores))$asDataFrame()%>%
  subset(isCellLine!='TRUE')
## 
Building the CSV... [######--------------]30.24%   45381/150072       
Building the CSV... [######--------------]30.24%   45381/150072       
Building the CSV... [####################]100.00%   150072/150072   Done...    
Downloading  [##------------------]7.96%   2.0MB/25.1MB (1.7MB/s) Job-103158846784545651976749201.csv     
Downloading  [###-----------------]15.92%   4.0MB/25.1MB (1.8MB/s) Job-103158846784545651976749201.csv     
Downloading  [#####---------------]23.88%   6.0MB/25.1MB (1.8MB/s) Job-103158846784545651976749201.csv     
Downloading  [######--------------]31.85%   8.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [########------------]39.81%   10.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [##########----------]47.77%   12.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [###########---------]55.73%   14.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [#############-------]63.69%   16.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [##############------]71.65%   18.0MB/25.1MB (1.9MB/s) Job-103158846784545651976749201.csv     
Downloading  [################----]79.61%   20.0MB/25.1MB (2.0MB/s) Job-103158846784545651976749201.csv     
Downloading  [##################--]87.58%   22.0MB/25.1MB (2.0MB/s) Job-103158846784545651976749201.csv     
Downloading  [###################-]95.54%   24.0MB/25.1MB (2.1MB/s) Job-103158846784545651976749201.csv     
Downloading  [####################]100.00%   25.1MB/25.1MB (2.1MB/s) Job-103158846784545651976749201.csv Done...
##get metaviper scores
mtab<-synapser::synTableQuery(paste('select * from',metaviper_scores))$asDataFrame()
## 
Building the CSV... [#-------------------]4.93%   72426/1467984       
Building the CSV... [###-----------------]15.80%   231871/1467984       
Building the CSV... [####----------------]20.84%   305867/1467984       
Building the CSV... [######--------------]31.85%   467599/1467984       
Building the CSV... [#########-----------]44.36%   651131/1467984       
Building the CSV... [#########-----------]44.36%   651131/1467984       
Building the CSV... [####################]100.00%   1467984/1467984   Done...    
Downloading  [--------------------]2.47%   2.0MB/81.0MB (2.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [#-------------------]4.94%   4.0MB/81.0MB (2.2MB/s) Job-103158857591122940761569476.csv     
Downloading  [#-------------------]7.40%   6.0MB/81.0MB (2.4MB/s) Job-103158857591122940761569476.csv     
Downloading  [##------------------]9.87%   8.0MB/81.0MB (2.5MB/s) Job-103158857591122940761569476.csv     
Downloading  [##------------------]12.34%   10.0MB/81.0MB (2.5MB/s) Job-103158857591122940761569476.csv     
Downloading  [###-----------------]14.81%   12.0MB/81.0MB (2.6MB/s) Job-103158857591122940761569476.csv     
Downloading  [###-----------------]17.27%   14.0MB/81.0MB (2.7MB/s) Job-103158857591122940761569476.csv     
Downloading  [####----------------]19.74%   16.0MB/81.0MB (2.8MB/s) Job-103158857591122940761569476.csv     
Downloading  [####----------------]22.21%   18.0MB/81.0MB (2.8MB/s) Job-103158857591122940761569476.csv     
Downloading  [#####---------------]24.68%   20.0MB/81.0MB (2.9MB/s) Job-103158857591122940761569476.csv     
Downloading  [#####---------------]27.14%   22.0MB/81.0MB (3.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [######--------------]29.61%   24.0MB/81.0MB (3.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [######--------------]32.08%   26.0MB/81.0MB (3.1MB/s) Job-103158857591122940761569476.csv     
Downloading  [#######-------------]34.55%   28.0MB/81.0MB (3.2MB/s) Job-103158857591122940761569476.csv     
Downloading  [#######-------------]37.01%   30.0MB/81.0MB (3.3MB/s) Job-103158857591122940761569476.csv     
Downloading  [########------------]39.48%   32.0MB/81.0MB (3.4MB/s) Job-103158857591122940761569476.csv     
Downloading  [########------------]41.95%   34.0MB/81.0MB (3.5MB/s) Job-103158857591122940761569476.csv     
Downloading  [#########-----------]44.42%   36.0MB/81.0MB (3.6MB/s) Job-103158857591122940761569476.csv     
Downloading  [#########-----------]46.88%   38.0MB/81.0MB (3.7MB/s) Job-103158857591122940761569476.csv     
Downloading  [##########----------]49.35%   40.0MB/81.0MB (3.8MB/s) Job-103158857591122940761569476.csv     
Downloading  [##########----------]51.82%   42.0MB/81.0MB (3.9MB/s) Job-103158857591122940761569476.csv     
Downloading  [###########---------]54.29%   44.0MB/81.0MB (4.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [###########---------]56.76%   46.0MB/81.0MB (4.1MB/s) Job-103158857591122940761569476.csv     
Downloading  [############--------]59.22%   48.0MB/81.0MB (4.2MB/s) Job-103158857591122940761569476.csv     
Downloading  [############--------]61.69%   50.0MB/81.0MB (4.3MB/s) Job-103158857591122940761569476.csv     
Downloading  [#############-------]64.16%   52.0MB/81.0MB (4.4MB/s) Job-103158857591122940761569476.csv     
Downloading  [#############-------]66.63%   54.0MB/81.0MB (4.5MB/s) Job-103158857591122940761569476.csv     
Downloading  [##############------]69.09%   56.0MB/81.0MB (4.6MB/s) Job-103158857591122940761569476.csv     
Downloading  [##############------]71.56%   58.0MB/81.0MB (4.7MB/s) Job-103158857591122940761569476.csv     
Downloading  [###############-----]74.03%   60.0MB/81.0MB (4.8MB/s) Job-103158857591122940761569476.csv     
Downloading  [###############-----]76.50%   62.0MB/81.0MB (4.8MB/s) Job-103158857591122940761569476.csv     
Downloading  [################----]78.96%   64.0MB/81.0MB (5.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [################----]81.43%   66.0MB/81.0MB (5.0MB/s) Job-103158857591122940761569476.csv     
Downloading  [#################---]83.90%   68.0MB/81.0MB (5.1MB/s) Job-103158857591122940761569476.csv     
Downloading  [#################---]86.37%   70.0MB/81.0MB (5.2MB/s) Job-103158857591122940761569476.csv     
Downloading  [##################--]88.83%   72.0MB/81.0MB (5.3MB/s) Job-103158857591122940761569476.csv     
Downloading  [##################--]91.30%   74.0MB/81.0MB (5.4MB/s) Job-103158857591122940761569476.csv     
Downloading  [###################-]93.77%   76.0MB/81.0MB (5.5MB/s) Job-103158857591122940761569476.csv     
Downloading  [###################-]96.24%   78.0MB/81.0MB (5.6MB/s) Job-103158857591122940761569476.csv     
Downloading  [####################]98.70%   80.0MB/81.0MB (5.6MB/s) Job-103158857591122940761569476.csv     
Downloading  [####################]100.00%   81.0MB/81.0MB (5.7MB/s) Job-103158857591122940761569476.csv Done...
##get rf loadings
rftab<-synapser::synTableQuery(paste('select * from',rf_mp))$asDataFrame()%>%
  select(LV_Full,`Cutaneous Neurofibroma`,`Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,`Plexiform Neurofibroma`)%>%
  mutate(latent_var=gsub('`','',LV_Full))%>%
  select(-LV_Full)
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   79.8kB/79.8kB (481.6kB/s) Job-103158862193713213287060949.csv Done...
samps<-intersect(dtab$specimenID,mtab$specimenID)

#get RF-selected latent variables
lvs<-synTableQuery("select * from syn21318452")$asDataFrame()%>%
  rename(latent_var='LatentVar')%>%
  select(-c(ROW_ID,ROW_VERSION))
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   3.7kB/3.7kB (1.9MB/s) Job-103158875804676108625138343.csv Done...
mp_res<-dtab%>%
  subset(specimenID%in%samps)%>%
  select(latent_var,value,specimenID,tumorType)%>%
  right_join(lvs,by='latent_var')

combined<-mtab%>%
  select(specimenID,metaviperscore,gene,sex)%>%distinct()%>%
  right_join(mp_res,by='specimenID')


#now compute some basic stats
#mp_stats<-mp_res%>%
#  rowwise()%>%mutate(All=max(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma))%>%
#  rowwise()%>%
#  mutate(MeanVal=mean(c(`Cutaneous Neurofibroma`,`Plexiform Neurofibroma`,`Malignant Peripheral Nerve Sheath Tumor`,Neurofibroma)))

#DT::datatable(mp_stats)

Plotting protein correlations

With the RF-selected LVs for each random forest prediction, we can plot those metaviper proteins that correlate with them.

corVals=combined%>%#subset(latent_var%in%unique(unlist(top10)))%>%
    group_by(latent_var,gene)%>%
  summarize(corVal=cor(value,metaviperscore,use='pairwise.complete.obs'),numSamps=n_distinct(specimenID))

corVals
## # A tibble: 597,016 x 4
## # Groups:   latent_var [98]
##    latent_var               gene    corVal numSamps
##    <chr>                    <chr>    <dbl>    <int>
##  1 1,REACTOME_MRNA_SPLICING AATF    0.436        77
##  2 1,REACTOME_MRNA_SPLICING ABCA1  -0.570        77
##  3 1,REACTOME_MRNA_SPLICING ABCC8  -0.327        77
##  4 1,REACTOME_MRNA_SPLICING ABCC9  -0.619        77
##  5 1,REACTOME_MRNA_SPLICING ABCG1  -0.521        77
##  6 1,REACTOME_MRNA_SPLICING ABCG4   0.239        77
##  7 1,REACTOME_MRNA_SPLICING ABI1   -0.356        77
##  8 1,REACTOME_MRNA_SPLICING ABL1   -0.0887       77
##  9 1,REACTOME_MRNA_SPLICING ABL2   -0.318        77
## 10 1,REACTOME_MRNA_SPLICING ABLIM3 -0.652        77
## # … with 597,006 more rows
##let's store this in Synapse
tab<-synBuildTable('RF-selected LVs correlated with Metaviper Activity',parent='syn21046734',corVals)
synStore(tab)
## 
Uploading [--------------------]0.00%   0.0bytes/27.0MB  file14ab75dcc638f     
Uploading [######--------------]29.63%   8.0MB/27.0MB (626.8kB/s) file14ab75dcc638f     
Uploading [############--------]59.26%   16.0MB/27.0MB (621.1kB/s) file14ab75dcc638f     
Uploading [##################--]88.89%   24.0MB/27.0MB (654.5kB/s) file14ab75dcc638f     
Uploading [####################]100.00%   27.0MB/27.0MB (597.6kB/s) file14ab75dcc638f Done...    
Update: 0 [--------------------]0.00%   0/9223372036854775807       
Update: 0 [--------------------]0.00%   0/9223372036854775807       
Update: 0 [--------------------]0.00%   0/9223372036854775807       
Update: 0 [####################]100.00%   9223372036854775807/9223372036854775807   Done...
## <synapseclient.table.CsvFileTable object at 0x11ecc7f60>
#corVals<-corVals%>%subset(latent_var%in%unique(unlist(top10)))
##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)

##first re-order variables to plot
top.df<-mp_res%>%
  select(-c(specimenID,tumorType,value))%>%
  gather(key="tumorType",value="top40",-latent_var)%>%unique()

p<-corVals%>%
              ungroup()%>%
  subset(latent_var%in%unique(top.df$latent_var))%>%
          #    mutate(LatentVariable = stringr::str_trim(as.character(latent_var), 20))%>%
              ggplot()+geom_boxplot(aes(x=latent_var,y=corVal))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with lv")
print(p)

There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.

These plots represent the top latent variables for a predictor of each tumor type and the proteins that are correlated with them.

corthresh=0.75

##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%
  subset(latent_var%in%unique(unlist(lvs)))%>%
      ungroup()%>%
  select(latent_var)%>%
  distinct()

print(paste('we found',nrow(cor_cell_types),'lvs with some protein correlation greater than',corthresh))
## [1] "we found 52 lvs with some protein correlation greater than 0.75"
DT::datatable(cor_cell_types)
apply(cor_cell_types,1,function(x){
  ct=x[['latent_var']]

  #for each gene and cell type
  genes=subset(corVals,latent_var==ct)%>%
        subset(corVal>corthresh)%>%
   arrange(desc(corVal))%>%
      ungroup()

    if(nrow(genes)>12){
    new.corthresh=format(genes$corVal[12],digits=3)
    genes=genes[1:12,]
  }else{
    new.corthresh=corthresh
  }

  scores=subset(combined,gene%in%genes$gene)%>%subset(latent_var==ct)
  dis= subset(top.df,latent_var==ct)%>%
          subset(top40=='Y')%>%
          select(tumorType)%>%
          unique()
  paste(collapse=',')
  p2<- ggplot(scores)+
      geom_point(aes(x=value,y=metaviperscore,
          col=gene,shape=tumorType))+
  #  scale_x_log10()+
      ggtitle(paste(ct,'correlation >',new.corthresh,'\n',paste(unlist(dis),collapse=',')))
       
  cat(ct)##print out so we can search
  print(p2)
 # ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})
## 1,REACTOME_MRNA_SPLICING

## 13,REACTOME_GLUCOSE_METABOLISM

## 22,KEGG_PPAR_SIGNALING_PATHWAY

## 24,PID_DELTANP63PATHWAY

## 39,SVM Dendritic cells resting

## 4,REACTOME_NEURONAL_SYSTEM

## 45,REACTOME_RNA_POL_I_PROMOTER_OPENING

## 517,REACTOME_SIGNALING_BY_EGFR_IN_CANCER

## 720,PID_FANCONI_PATHWAY

## 767,SVM B cells naive

## 848,REACTOME_GENERIC_TRANSCRIPTION_PATHWAY

## 915,MIPS_SPLICEOSOME

## 928,DMAP_ERY3

## 953,IRIS_Monocyte-Day1

## LV 100

## LV 138

## LV 15

## LV 167

## LV 185

## LV 187

## LV 195

## LV 229

## LV 238

## LV 272

## LV 303

## LV 304

## LV 308

## LV 32

## LV 334

## LV 376

## LV 379

## LV 380

## LV 396

## LV 434

## LV 445

## LV 496

## LV 519

## LV 520

## LV 533

## LV 546

## LV 624

## LV 625

## LV 653

## LV 665

## LV 751

## LV 835

## LV 851

## LV 9

## LV 909

## LV 917

## LV 957

## LV 984

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

## 
## [[33]]

## 
## [[34]]

## 
## [[35]]

## 
## [[36]]

## 
## [[37]]

## 
## [[38]]

## 
## [[39]]

## 
## [[40]]

## 
## [[41]]

## 
## [[42]]

## 
## [[43]]

## 
## [[44]]

## 
## [[45]]

## 
## [[46]]

## 
## [[47]]

## 
## [[48]]

## 
## [[49]]

## 
## [[50]]

## 
## [[51]]

## 
## [[52]]